Guided Project: Visualizing Pixar's Roller Coaster
Posted on Wed 08 July 2015 in Projects
Introduction to the data¶
In [36]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
pixar_movies = pd.read_csv("PixarMovies.csv")
# Number of rows
print(pixar_movies.shape[0])
In [37]:
# Number of columns
print(pixar_movies.shape[1])
In [38]:
pixar_movies.head(15)
Out[38]:
In [22]:
pixar_movies.dtypes
Out[22]:
Data cleaning¶
In [39]:
# Use the `str` attribute followed by the string method `rstrip()` to apply the string method
# to every value in the column. Use the `astype()` method to cast the column to the float data type.
pixar_movies["Domestic %"] = pixar_movies["Domestic %"].str.rstrip("%").astype("float")
pixar_movies["International %"] = pixar_movies["International %"].str.rstrip("%").astype("float")
In [40]:
# Multiply the `IMDB Score` column by 10
pixar_movies["IMDB Score"] = pixar_movies["IMDB Score"]*10
In [41]:
# Create a new DataFrame containing only the first 14 rows.
filtered_pixar = pixar_movies.loc[0:13]
In [44]:
# Set the `Movie` column as the index for the DataFrame.
pixar_movies.set_index("Movie", inplace=True)
filtered_pixar.set_index("Movie", inplace=True)
In [45]:
pixar_movies
Out[45]:
Data visualization, line plots¶
In [59]:
critics_reviews = pixar_movies[["RT Score","Metacritic Score","IMDB Score"]]
critics_reviews.plot()
Out[59]:
In [60]:
critics_reviews.plot(figsize=(10,6))
Out[60]:
Data visualization, box plot¶
In [66]:
pixar_movies[["RT Score","Metacritic Score","IMDB Score"]].plot(kind="box")
Out[66]:
In [68]:
pixar_movies[["RT Score","Metacritic Score","IMDB Score"]].plot(kind="box", figsize=(9,5))
Out[68]:
Data visualization, stacked bar plots¶
In [96]:
revenue_proportions = filtered_pixar[["Domestic %", "International %"]]
revenue_proportions.plot(kind='bar', stacked=True, figsize=(7,6))
Out[96]:
Next steps¶
In [65]:
# Grouped bar plot for oscar nominations / oscars won
filtered_pixar[["Oscars Nominated","Oscars Won"]].plot(kind='bar', figsize=(10,6))
Out[65]: